"""
This is the main Triplify script.

@version Id:
@license LGPL
@copyright 2008 Remon Georgy (remon.sherin@gmail.com)
"""

import hashlib
import re
import urllib
import simplejson as json
import config
import time

from mod_python import util
from config import triplify
from os import path
from MySQLdb import constants

baseURI = ''
serverURI = ''
restPath = ''
requestURI = ''
buffered = False 

class Tripleizer:
    maxResults = 0
    json = {}
    version = 'V0.4'
    typed = {}
    output = ''
    
    
    def __init__(self, tConfig, req):
        """ Constructor
        tConfig -- Array of configuration parameters
        req -- apache's request obect
        """
        global restPath
        if('db' in tConfig):
            self.db = tConfig['db']
            self.cursor = self.db.cursor()
            self.config = tConfig
            self.ns = tConfig.get('namespaces','')
            self.objectProperties  = tConfig.get('objectProperties', None)
            self.classMap = tConfig.get('classMap','')            
            self.restPath = restPath.strip('/').split('/')     
            self.curDesc = None
            self.req = req 
        else:
            req.write('DB configuration is missing')
            raise Exception
    
        
    def bufWrite(self, message, req):
        if(triplify.has_key('TTL')):
            self.output += message
        else:
            req.write(message)
        
    def tripleize(self, queries, c=None, idN=None):
        global serverURI, requestURI
        selfURI = serverURI + requestURI
        self.writeTriple(selfURI, self.uri('owl:imports'),self.ns.get('vocabulary',''))
        self.writeTriple(selfURI, self.uri('rdfs:comment'),'Generated by Triplify '+self.version+' (http://Triplify.org)', True)
        if('license' in self.config):
            self.writeTriple(selfURI, 'http://creativecommons.org/ns#license',self.config['license'])
        
        if('metadata' in self.config and isinstance(self.config['metadata'], list) and len(self.config['metadata']) > 0):
            for key, value in self.config['metadata']:
                if(value):
                    self.writeTriple(selfURI, self.uri(key),value,True)
                    
        for rclass in queries.keys():
            time_to_get_out = False
            if( (c == None) or (c == rclass) ):
                q = queries[rclass]
                q = q  if isinstance(q, list) else list([q])
                for query in q: 
                    group = ''                   
                    if( rclass == 'update' ):
                        cols = '*' if len(self.restPath) == 7 else 'SUBSTR(id,1,' + str(len(self.restPath)*3+1) + ')'
                        clauses = {
                        1:' AND YEAR(id)="',
                        2:' AND MONTH(id)="',
                        3:' AND DAY(id)="',
                        4:' AND HOUR(id)="',
                        5:' AND MINUTE(id)="',
                        6:' AND SECOND(id)="'
                        }
                        for i in range(1, len(self.restPath)):
                            group += clauses[i] + self.restPath[i] + '"'
                        if(len(self.restPath)!=7):
                            group += ' GROUP BY SUBSTR(id,1,' + str(len(self.restPath)*3+1) + ')'
                    elif( (self.config['LinkedDataDepth'] == 2) and (c == None) ):
                        self.writeTriple(self.uri(rclass),self.uri('rdf:type'),self.uri('owl:Class'))
                        time_to_get_out = True
                        break
                    else:
                        cols = ((self.config['LinkedDataDepth']==1 and c == None ) or ( 'id' if ( self.config['LinkedDataDepth'] == 2 and idN == None ) else '*' ))
         
                    query="SELECT "+cols+" FROM ("+query+") t WHERE 1"+group
                    pRequest = util.FieldStorage(self.req)
                    if(rclass and (not idN) and (len(pRequest.keys()) > 0)):
                        for key in  pRequest.keys():
                            if(key[0:2] != 't-' and key.find('`') == -1 and key != 'q'):
                                query +=' AND `' + key + '`=' + self.dbQuote(pRequest[key])                    
                    start = pRequest.get('t-start', 0)
                    one = min(pRequest.get('t-results', 0), self.maxResults) if self.maxResults > 0 else pRequest.get('t-results', 0)
                    erg = one if pRequest.get('t-results', 0) > 0 else self.maxResults
                    
                    query += (' AND id='+self.dbQuote(idN) if idN else '') + (' ORDER BY '+pRequest.get('t-order','') if re.search('/^[A-Za-z0-9: ,]+?$/',pRequest.get('t-order','')) else '') +  (' LIMIT ' + (start + ',' + (erg if erg else 20) if start else erg) if (start or erg) else '')
                    
                    cursor = self.db.cursor()
                    self.dbQuery(query, cursor)
                    dtype = self.dbDtypes(cursor)
                    allResults = self.dbFetchAll(cursor)
                    for cl in allResults:
                        #tuples lacks for a lot of manipulation methods
                        cl = list(cl)                      
                        self.makeTriples(cl, rclass, dtype)

                    self.typed[rclass] = True
                    if cols == 'id':
                        break
                if(time_to_get_out == True):
                    break
                                   
    def writeTriple(self, subject, predicate, objectN, isLiteral=False, dtype=None, lang=None):
        pRequest = util.FieldStorage(self.req)
        if pRequest.get('t-output', '') == 'json':
            oa = {'value':objectN, 'type':('literal' if isLiteral else 'uri')}
            if isLiteral and dtype:
                oa['datatype'] = dtype
            elif isLiteral and lang:
                oa['language'] = lang
            if ( subject in self.json and predicate in self.json[subject]):
                self.json[subject][predicate].update(oa)
            else:
                self.json[subject] = {predicate:oa}         
        else:
            if isLiteral:
                objectN = '"'+objectN.replace('\\','\\\\').replace("\r", '\r').replace("\n", '\n').replace('"', '\"')+'"'+ ('^^<'+dtype+'>' if dtype else ('@'+lang if lang else ''))
            else:
                objectN = '<'+objectN+'>'
            self.bufWrite('<'+subject+'> <'+predicate+'> '+objectN+' .\n', self.req)
            
    def dbResultMap(self, result):
        return list([(d[0], value) for d, value in zip(self.curDesc, result)])
            
    def uri(self, name, default = None):
        name = str(name)
        if name.find('://') > -1 :
            return name
        global baseURI
        ret = self.ns.get(name[:name.find(':')],'')+name[name.find(':')+1:] if (name.find(':') > -1) else (default if default else baseURI) + name
        return ret 
    def dbQuote(self, string):
        return self.db.literal(string)
    
    def dbQuery(self, query, cursor):
        cursor.execute(query)
        self.curDesc = cursor.description
        # a more specific return
        return True
    
    def dbFetch(self, cursor):
        return cursor.fetchone()
    
    def dbFetchAll(self, cursor):
        return cursor.fetchall()
    
    def dbDtypes(self, cursor):      
        dtype = {}  
        for field_desc in cursor.description:
            if field_desc[1] == constants.FIELD_TYPE.DATETIME:
                dtype[field_desc[0]] = 'xsd:dateTime'
        return dtype
 
    def makeTriples(self, cl, rclass, dtypes):
        cl = self.dbResultMap(cl) 
        rdf_ns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        ipref = self.uri(rclass+'/')
        item = cl.pop(0)
        uri = item[1]
        tempUri = uri
        key, value = None, None
        try:
            nextItem = cl[0]
            key, value = nextItem[0], nextItem[1] 
        except:
            key, value = None, None
        
        if( rclass == 'update'):
            uri = re.sub('/[^0-9]/','/',tempUri) + '#' + key[key.find('.'):][2:] + value if (key and value) else ''       
        
        subject= self.uri(uri, ipref)
        if not uri:
            joinedCl = ''.join([item[1].__str__() for item in cl])
            m = hashlib.md5()
            m.update(joinedCl)
            uri = m.hexdigest()
        if(rclass and not self.typed.get(rclass, None)): 
            if(rclass == 'update'): 
                c = 'http://triplify.org/vocabulary/update#Update' if ( len(self.restPath) == 7 ) else 'http://triplify.org/vocabulary/update#UpdateCollection' 
            else:
                c= self.classMap[rclass] if self.classMap.get(rclass, None) else rclass
            self.writeTriple(subject, rdf_ns+'type', self.uri(c, self.ns.get('vocabulary','')))
        
        for (p, val) in cl: 
            if(val and ((not dtypes.get(p, None) or (dtypes.get(p, None) != 'xsd:dateTime')) or (val!='0000-00-00 00:00:00'))):
                dtype = None
                lang = None
                if p.find('^^') > -1: 
                    dtype = self.uri(p[p.find('^^'):][2:])
                    p = p[:p.find('^')]
                elif(dtypes.get(p, None)): 
                    dtype=self.uri(dtypes[p])
                elif(p.find('@') > -1): 
                    lang =p[p.find('.'):][1:]
                    p= p[:p.find('@')]
    
                if(p.find('.') > -1): 
                    objectProperty = p[p.find('.'):][2:]
                    p = p[:p.find('.')]
                elif(self.objectProperties.has_key(p)): 
                    objectProperty=self.objectProperties[p]
                else:
                    objectProperty = None
                
                if(self.config.has_key('CallbackFunctions') and self.config['CallbackFunctions'].has_key(p)):
                    conMethod = getattr(config, self.config['CallbackFunctions'][p], None)
                    if(callable(conMethod)):
                        val = conMethod(val)
                
                val = str(val).encode('utf8')
    
                prop=self.uri(p,self.ns.get('vocabulary',''))
                if(objectProperty): 
                    isLiteral = False
                    objectN = self.uri(objectProperty+ ('/' if ( objectProperty and (len(objectProperty)-1)!='/') else '')+val)
                else: 
                    isLiteral = True
                    objectN = ( val.replace(' ','T') if (dtypes.has_key(p) and dtypes[p] == 'xsd:dateTime') else val)
                
                self.writeTriple(subject,prop,objectN,isLiteral,dtype,lang)
        return

def index(req):
    global baseURI, serverURI, restPath, requestURI
    write = req.write
    server = req.server
    pRequest = util.FieldStorage(req)
    dType = pRequest.get('t-output', 'plain')
    rclass = None
    rinstance = None
    
    serverURI = 'http://' + req.hostname + ("" if (server.port == 80) else ":"+str(server.port))
    baseURI = serverURI + req.uri.replace('index.py','')
    restPath = pRequest.get('q', '')
    requestURI = req.unparsed_uri.replace('index.py?q=','').replace('&','?')
    req.content_type = 'text/html'
    
    if(restPath):
        r = restPath.split('/')
        rclass = r.pop(0)
        if( (rclass != 'update') and (len(r) == 1) ):
            rinstance = r.pop()
        if( ( rclass != 'update' and r) or not rclass or rclass not in triplify['queries']):
            write("<h1>Error 404</h1>Resource:"+ str( rinstance if rinstance else "\"NotSpecfied\" " ) +"of type:\""+rclass+"\" not found!")
            return
    
    if(dType == 'json'): 
        req.content_type = 'text/javascript'
    elif(dType == 'plain'): 
        req.content_type = 'text/plain'
    else:
        req.content_type = 'text/rdf'
    
    # Caching
    basePath = path.dirname( __file__ )
    m = hashlib.md5()
    m.update(requestURI)
    cacheFileName = m.hexdigest()
    
    cacheFileAbs = path.join(basePath,triplify['cachedir'], cacheFileName)

    if(path.exists(cacheFileAbs) and path.getmtime(cacheFileAbs) > int(time.time()) - triplify.get('TTL', 0) ):
        cacheFile = open(cacheFileAbs)
        write(cacheFile.read())
        return
    
    registrationFile = path.join(basePath, triplify['cachedir'], 'registered')
    if( not path.exists(registrationFile) and triplify['register']):
        url = 'http://triplify.org/register/?'+urllib.urlencode({'url':baseURI, 'type': triplify['namespaces'].get('vocabulary', '')})
        try:
            registeringPage = urllib.urlopen(url)
            registeringPage.close()
            registerfile = open(path.join(basePath, triplify['cachedir'],'register'), 'w')
            registerfile.write('')
            registerfile.close()
        except IOError:
            write('')
    
    t = Tripleizer(triplify, req)
    
    t.tripleize(triplify['queries'],rclass,rinstance)
    if(dType == 'json'):
        t.bufWrite(json.JSONEncoder().encode(t.json), req)
    if('TTL' in triplify and triplify['TTL'] > 0):
        cacheFile = open(cacheFileAbs, 'w')
        cacheFile.write(t.output)
        cacheFile.flush()
        cacheFile.close()
        req.write(t.output)